In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
In [2]:
dataElecGenByFuel = pd.read_csv("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/Elec_generation_by_fuel.csv", sep=",")
In [3]:
elecGenByFuel_df1 = pd.DataFrame(dataElecGenByFuel).set_index('Country').replace({'-':None}).replace({'^':None})
In [4]:
dataGenByYear = pd.read_csv("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years/ElectricityGeneration-TWH.csv", sep=",")
In [55]:
import numpy as np
import pandas as pd

import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode()

dataGenByYear = pd.read_csv("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years/ElectricityGeneration-TWH.csv", sep=",")

GenByYear_df1 = pd.DataFrame(dataGenByYear).rename(columns=dataGenByYear.iloc[1]).set_index('Country').dropna().drop(columns=['2021', '2011-21','2021']).replace({'-':None}).replace({'^':None}).drop(index=["Country"])
GenByYear_df1=GenByYear_df1.transpose()

py.iplot([{
    'x': GenByYear_df1.index,
    'y': GenByYear_df1[col],
    'name': col
}  for col in GenByYear_df1.columns], filename='cufflinks/multiple-lines-on-same-chart')
In [33]:
 
In [36]:
 
Out[36]:
1985 1986 1987 1988 1989 1990.0 1991.0 1992.0 1993.0 1994.0 ... 2012.0 2013.0 2014.0 2015.0 2016.0 2017.0 2018.0 2019.0 2020.0 2021.0
Country
Canada 459 468.6 496.4 506 498.1 480.6 507.0 518.6 530.2 553.1 ... 636.5 655.7 647.6 659.3 663.7 664.5 655.8 650.8 649.1 641.0
Mexico 96.2 99.6 106.2 112.2 120 117.6 128.6 132.3 134.6 146.1 ... 296.4 297.1 303.3 310.3 319.4 325.1 335.1 344.6 325.7 336.0
US 2657.2 2676.1 2772.2 2914.4 3155.4 3232.8 3270.7 3284.4 3404.7 3458.5 ... 4310.6 4330.3 4363.3 4348.7 4347.9 4302.5 4461.6 4411.2 4284.8 4406.4
Total North America 3212.4 3244.3 3374.8 3532.7 3773.6 3831.0 3906.3 3935.3 4069.6 4157.7 ... 5243.5 5283.1 5314.2 5318.4 5331.1 5292.2 5452.5 5406.5 5259.7 5383.5
Argentina 45.3 49 52.1 52.5 50.9 51.0 53.9 56.1 61.9 65.6 ... 136.0 139.4 138.6 145.4 147.2 145.6 146.8 139.5 144.6 152.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
Total Asia Pacific 1723.9 1815.7 1971.4 2123.8 2287.7 2469.2 2635.8 2780.2 2918.6 3176.0 ... 9278.3 9815.0 10335.8 10440.9 10951.1 11575.6 12372.4 12783.7 12949.3 13994.4
Total World 9883.2 10178 10667.9 11138 11657 11961.1 12222.7 12335.9 12599.4 12923.8 ... 22817.5 23452.4 24049.8 24292.0 24924.2 25647.7 26677.3 27036.6 26889.2 28466.3
of which: OECD 6560.3 6667 6948.7 7232.1 7605.3 7812.1 7986.6 8047.5 8229.2 8451.7 ... 11022.8 11014.8 10955.2 11004.1 11082.2 11124.4 11310.9 11191.3 10900.7 11210.2
Non-OECD 3322.8 3511 3719.2 3905.9 4051.7 4149.0 4236.1 4288.4 4370.2 4472.1 ... 11794.7 12437.7 13094.7 13287.9 13842.0 14523.4 15366.4 15845.3 15988.5 17256.1
European Union # 2022.1 2075.4 2147.4 2197.5 2248.4 2274.1 2317.7 2303.4 2303.8 2339.2 ... 2931.4 2912.1 2849.3 2897.9 2919.1 2951.4 2935.3 2894.0 2779.0 2895.3

102 rows × 37 columns

In [61]:
dataBioGenByYear = pd.read_csv("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years/BiofuelsProduction-PJ.csv", sep=",")
BioGenByYear_df1 = pd.DataFrame(dataBioGenByYear).rename(columns=dataBioGenByYear.iloc[1]).set_index('Country').dropna().drop(columns=['2021', '2011-21','2021']).replace({'-':None}).replace({'^':None}).drop(index=["Country"])
frames = [GenByYear_df1, BioGenByYear_df1]
result = pd.concat(frames, keys=["ElectricityGenerationTWH", "BiofuelsProductionPJ"])
result
Out[61]:
Canada Mexico US Total North America Argentina Brazil Chile Colombia Ecuador Peru ... 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
ElectricityGenerationTWH 1985 459 96.2 2657.2 3212.4 45.3 193.7 14 29.4 4.6 12.1 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1986 468.6 99.6 2676.1 3244.3 49 202.1 14.8 31.7 5 12.9 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1987 496.4 106.2 2772.2 3374.8 52.1 203.3 15.6 33.7 5.4 14 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1988 506 112.2 2914.4 3532.7 52.5 215 16.9 35.5 5.6 13.6 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1989 498.1 120 3155.4 3773.6 50.9 221.7 17.8 37 5.8 13.7 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
BiofuelsProductionPJ Asia Pacific NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 161.6 204.3 248.6 308.7 227.5 288 287.3 403.4 529.7 515.4
Total World NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 841.6 915.5 1024.7 1167.2 1068.3 1191.2 1282.7 1493.1 1628.2 1595.5
of which: OECD NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 511.7 542.4 612.9 661 656.1 686.7 752.4 835 832.7 831.4
Non-OECD NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 329.9 373.1 411.8 506.1 412.3 504.6 530.3 658.1 795.5 764.1
European Union NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 347.5 371 395.7 449.8 445.6 429.9 489.3 525.5 542.2 533.3

101 rows × 133 columns

In [10]:
fmask = 'C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years/*.csv*'

DataFrames={}
fileNames={}
for f in glob.glob(fmask):
    fileNames[f] = f
    data = pd.read_csv(f, sep=',', encoding= 'unicode_escape')
    DataFrames[f] = pd.DataFrame(data).rename(columns=data.iloc[1]).set_index('Country').dropna().replace({'-':0}).replace({'^':.005}).replace({'?':.05}).drop(index=["Country"])
    

fnStringArray = []
for fn in fileNames: 
    fnStringArray.append(str(fn.replace("C:/Users/kyled/OneDrive/Documents/Georgian/F22/CSVFiles/countries-years\\", "")))
    
dfValueArray = list(DataFrames.values())

result = pd.DataFrame({'idx':fnStringArray, 'dfs':dfValueArray})

n=-1
for r in  result['dfs']:
    n+=1
    print(n)
    with pd.ExcelWriter('C:/Users/kyled/OneDrive/Documents/Georgian/F22/IntroToDataAnalytics-COMP2099/final/cleanedData.xlsx', mode="a", engine="openpyxl",if_sheet_exists="replace",) as writer:
        result['dfs'].iloc[n].to_excel(writer, sheet_name=result['idx'].iloc[n]) 
    
0
C:\Users\kyled\anaconda3\lib\site-packages\openpyxl\workbook\child.py:99: UserWarning: Title is more than 31 characters. Some applications may not be able to read the file
  warnings.warn("Title is more than 31 characters. Some applications may not be able to read the file")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
In [62]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px


headMapData = pd.read_csv(r'C:\Users\kyled\Downloads\canada.2002-2008.monthlyAvgGHI.csv', sep=',', encoding= 'unicode_escape', low_memory=False)
heatMapDataFrame = pd.DataFrame(headMapData)
heatMapDataFrame

fig = px.density_mapbox(heatMapDataFrame, lat='latitude', lon='longitude', z='Avg Global', radius=1,
                        center=dict(lat=52.75, lon=-90), zoom=2,
                        mapbox_style="stamen-terrain")
fig.show()
In [ ]:
 
In [ ]: